##
### GLMMs and GEEs with Epilepsy Data
##

##### Load Required Libraries #####
library(tidyverse)  # ggplot2, tidyr
library(lme4)    # Functions: glmer
library(gee) # Functions: gee

##
#### Read in data set into R:
##
epilepsy <- read.table("http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.dat", 
                       header=FALSE)
names(epilepsy) <-  c("ID","trt","age","Week0","Week2","Week4","Week6","Week8")
epilepsy$trt <- factor(epilepsy$trt, levels=c(0,1), labels=c("Placebo","Progabide"))

## Convert to long form:
epi_long <- pivot_longer(epilepsy,
                         cols = 4:8,
                         names_to = "Time", names_prefix = "Week",
                         values_to = "Count")
epi_long$Time <- as.numeric(epi_long$Time)
  
## Create new variables
epi_long <- epi_long %>% mutate(
  PostBase = as.numeric(Time != 0),
  Weeks = 8*(PostBase==0) + 2*(PostBase==1)
)


##### Exploratory Data Analysis #####

## Number of seizures by group by time period:
boxplot(Count ~ trt+Time, at=c(1,2,4,5,7,8,10,11,13,14), data=epi_long, col=c("red","blue"), ylab="Number of Seizures", xlab="", names=rep(c("Plac","Prog"),5), main="Number of Seizures by Group and Week")
mtext( c("Baseline",paste("Week",c(2,4,6,8))), side=1, at=c(1.5,4.5,7.5,10.5,13.5), line=3 )

# Calculate rates per week:
epi_long <- epi_long %>% mutate(
  Rate = case_when(
    Time == 0 ~ Count/8,
    Time != 0 ~ Count/2
  )
)

library(yarrr)
pirateplot(Count ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

pirateplot(Rate ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

pirateplot(log1p(Rate) ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

# Plot of mean rates:
means <- tapply(epi_long$Rate, list(epi_long$Time,epi_long$trt), mean)
matplot(matrix(c(0,2,4,6,8)), means,
        col=c(1,1), lty=c(3,1), type="o",
        pch=c(1,16), xlab="Time (weeks)",
        ylab="Mean rate of seizures (per week)",
        ylim=c(2.5,5.0),
        main="Mean Rate of Seizures by Treatment Group")
legend(3.5, 3.0, c("Placebo","Progabide"), lty=c(3,1))

## Plots of individual counts:
matplot(matrix(c(0,2,4,6,8)), t(epilepsy[,4:8]),
    col=(as.numeric(epilepsy$trt=="Placebo")+2), type="l", ylim = c(0,180),
    xlab="Week",ylab="No. of Seizures ", main="Response Profiles by ID and Treatment")
legend(5,150,c("Placebo","Progabide"),col=c(2,3),lty=c(1,1))

library(GGally)

epilepsy %>% ggparcoord(4:8, scale = "globalminmax", group = "trt") +
  theme_bw() +
  facet_wrap(~trt)

# Who is the outlier at baseline in the Progabide group?
plot(Count[trt=="Progabide"] ~ Time[trt=="Progabide"],
     xlab="Week", ylab="Seizures",
     main="Counts of Seizures for Progabide Group", col="blue",
     data=epi_long)
points(Count[trt=="Placebo"] ~ Time[trt=="Placebo"], pch=2, 
       col="red", data=epi_long)
identify(epi_long$Count ~ epi_long$Time, labels=epi_long$ID)

## integer(0)
# ID 49 data:
epi_long[epi_long$ID==49,]
## # A tibble: 5 × 8
##      ID trt         age  Time Count PostBase Weeks  Rate
##   <int> <fct>     <int> <dbl> <int>    <dbl> <dbl> <dbl>
## 1    49 Progabide    22     0   151        0     8  18.9
## 2    49 Progabide    22     2   102        1     2  51  
## 3    49 Progabide    22     4    65        1     2  32.5
## 4    49 Progabide    22     6    72        1     2  36  
## 5    49 Progabide    22     8    63        1     2  31.5
# This patient could have a large impact on the analysis -
# Book does analysis with and without ID 49.


##### Fitting GLMMs #####
# Since the number of weeks each count refers to differs
# (8 weeks for baseline by 2 weeks afterwards)
# we need to include an "offset" --> Model mean rate per week

# With glmer (and lmer) function, random effects specified in parentheses:
?lmer
?glmer

## Fit GLMM
epi_long <- epi_long %>% mutate(PostBase = factor(PostBase))

mod1 <- glmer(Count ~ trt*PostBase + (PostBase | ID), offset=log(Weeks),
              family=poisson, data=epi_long)
summary(mod1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Count ~ trt * PostBase + (PostBase | ID)
##    Data: epi_long
##  Offset: log(Weeks)
## 
##      AIC      BIC   logLik deviance df.resid 
##   1864.4   1890.2   -925.2   1850.4      288 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1394 -0.7073 -0.0620  0.5138  6.9653 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  ID     (Intercept) 0.4999   0.7070       
##         PostBase1   0.2319   0.4816   0.16
## Number of obs: 295, groups:  ID, 59
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)             1.0708453  0.1402715   7.634 2.27e-14
## trtProgabide            0.0512167  0.1927136   0.266   0.7904
## PostBase1              -0.0004996  0.1091006  -0.005   0.9963
## trtProgabide:PostBase1 -0.3062158  0.1504205  -2.036   0.0418
## 
## Correlation of Fixed Effects:
##             (Intr) trtPrg PstBs1
## trtProgabid -0.725              
## PostBase1    0.011 -0.013       
## trtPrgb:PB1 -0.014  0.025 -0.709
plot(allEffects(mod1, residuals = T), type = "response", x.var = "PostBase") #Issues in scaling with offset in Poisson rate models

# Estimated random effects
ranef(mod1)
## $ID
##    (Intercept)    PostBase1
## 1  -0.60197367  0.053075955
## 2  -0.60197367  0.053075955
## 3  -0.95584893  0.112780207
## 4  -0.78751955  0.127425353
## 5   0.99154662 -0.116149178
## 6   0.10948363 -0.138443638
## 7  -0.57934911 -0.079468569
## 8   0.81464415  0.566916757
## 9  -0.01965844 -0.032916035
## 10 -0.46624570  0.706002860
## 11  0.78554644  0.243655494
## 12  0.31770399 -0.025038531
## 13 -0.25263814 -0.103699518
## 14  0.55723001  0.033499563
## 15  1.26048139 -0.300455313
## 16  0.64351429 -0.804224609
## 17 -0.38232600 -0.620312840
## 18  1.52980607  0.133760130
## 19 -0.26255897 -0.145316438
## 20 -0.17314632 -0.164548924
## 21 -0.54995950  0.015238657
## 22 -0.71194460  0.132566532
## 23 -0.32687377 -0.202650252
## 24  0.17754869  0.066827808
## 25  0.89477469  0.893345724
## 26 -0.88246585 -0.321623991
## 27 -0.72517425 -0.108837282
## 28  0.67746017  0.137684040
## 29  1.07892366 -0.202875873
## 30  0.41195329  0.028095350
## 31 -0.32260666 -0.412393844
## 32 -0.65611910  0.221362632
## 33 -0.18652492  0.186766964
## 34 -0.07899799 -0.305198729
## 35  0.35362987  0.996654891
## 36 -0.37841126  0.381263969
## 37 -0.65496454  0.021082513
## 38  0.92628753 -0.532610668
## 39  0.48171433 -0.001086379
## 40 -1.07529125 -0.273473810
## 41 -0.21504353 -0.556473937
## 42 -0.51869848  0.063582682
## 43  0.65368382  0.589958949
## 44  0.35793214  0.010241639
## 45  0.44457601  0.295731188
## 46 -0.94074736  0.042165802
## 47  0.37897839  0.173507511
## 48 -0.83298727 -0.495214546
## 49  1.81733174  0.986679568
## 50 -0.12992207 -0.153557314
## 51  0.49339327 -0.103671459
## 52  0.16410222 -0.536852792
## 53  0.82987828  0.497735321
## 54 -0.05966416 -0.204850411
## 55 -0.34384372  0.112038425
## 56  0.06474998  0.892379169
## 57 -0.10209409 -0.626688165
## 58 -0.81580028 -0.856342240
## 59 -0.60188743 -0.013537367
## 
## with conditional variances for "ID"
library(lattice)
dotplot(ranef(mod1, postVar = T))
## $ID

# Subject-specific coefficients (beta_k + b_ki)
coef(mod1)
## $ID
##     (Intercept) trtProgabide    PostBase1 trtProgabide:PostBase1
## 1   0.468871652    0.0512167  0.052576363             -0.3062158
## 2   0.468871652    0.0512167  0.052576363             -0.3062158
## 3   0.114996393    0.0512167  0.112280615             -0.3062158
## 4   0.283325775    0.0512167  0.126925761             -0.3062158
## 5   2.062391945    0.0512167 -0.116648770             -0.3062158
## 6   1.180328951    0.0512167 -0.138943230             -0.3062158
## 7   0.491496211    0.0512167 -0.079968161             -0.3062158
## 8   1.885489473    0.0512167  0.566417165             -0.3062158
## 9   1.051186885    0.0512167 -0.033415627             -0.3062158
## 10  0.604599627    0.0512167  0.705503268             -0.3062158
## 11  1.856391768    0.0512167  0.243155901             -0.3062158
## 12  1.388549310    0.0512167 -0.025538124             -0.3062158
## 13  0.818207179    0.0512167 -0.104199110             -0.3062158
## 14  1.628075335    0.0512167  0.032999971             -0.3062158
## 15  2.331326719    0.0512167 -0.300954905             -0.3062158
## 16  1.714359615    0.0512167 -0.804724201             -0.3062158
## 17  0.688519325    0.0512167 -0.620812432             -0.3062158
## 18  2.600651394    0.0512167  0.133260538             -0.3062158
## 19  0.808286356    0.0512167 -0.145816030             -0.3062158
## 20  0.897698999    0.0512167 -0.165048516             -0.3062158
## 21  0.520885828    0.0512167  0.014739065             -0.3062158
## 22  0.358900720    0.0512167  0.132066939             -0.3062158
## 23  0.743971552    0.0512167 -0.203149844             -0.3062158
## 24  1.248394014    0.0512167  0.066328216             -0.3062158
## 25  1.965620013    0.0512167  0.892846132             -0.3062158
## 26  0.188379477    0.0512167 -0.322123583             -0.3062158
## 27  0.345671073    0.0512167 -0.109336875             -0.3062158
## 28  1.748305496    0.0512167  0.137184448             -0.3062158
## 29  2.149768982    0.0512167 -0.203375466             -0.3062158
## 30  1.482798609    0.0512167  0.027595757             -0.3062158
## 31  0.748238663    0.0512167 -0.412893437             -0.3062158
## 32  0.414726224    0.0512167  0.220863039             -0.3062158
## 33  0.884320408    0.0512167  0.186267372             -0.3062158
## 34  0.991847334    0.0512167 -0.305698321             -0.3062158
## 35  1.424475195    0.0512167  0.996155299             -0.3062158
## 36  0.692434064    0.0512167  0.380764376             -0.3062158
## 37  0.415880782    0.0512167  0.020582920             -0.3062158
## 38  1.997132852    0.0512167 -0.533110260             -0.3062158
## 39  1.552559658    0.0512167 -0.001585971             -0.3062158
## 40 -0.004445929    0.0512167 -0.273973402             -0.3062158
## 41  0.855801789    0.0512167 -0.556973529             -0.3062158
## 42  0.552146840    0.0512167  0.063083090             -0.3062158
## 43  1.724529148    0.0512167  0.589459357             -0.3062158
## 44  1.428777466    0.0512167  0.009742047             -0.3062158
## 45  1.515421337    0.0512167  0.295231596             -0.3062158
## 46  0.130097959    0.0512167  0.041666210             -0.3062158
## 47  1.449823713    0.0512167  0.173007919             -0.3062158
## 48  0.237858057    0.0512167 -0.495714138             -0.3062158
## 49  2.888177063    0.0512167  0.986179975             -0.3062158
## 50  0.940923249    0.0512167 -0.154056907             -0.3062158
## 51  1.564238596    0.0512167 -0.104171052             -0.3062158
## 52  1.234947547    0.0512167 -0.537352384             -0.3062158
## 53  1.900723600    0.0512167  0.497235728             -0.3062158
## 54  1.011181162    0.0512167 -0.205350004             -0.3062158
## 55  0.727001607    0.0512167  0.111538833             -0.3062158
## 56  1.135595306    0.0512167  0.891879577             -0.3062158
## 57  0.968751233    0.0512167 -0.627187757             -0.3062158
## 58  0.255045048    0.0512167 -0.856841832             -0.3062158
## 59  0.468957890    0.0512167 -0.014036959             -0.3062158
## 
## attr(,"class")
## [1] "coef.mer"
# Treating time as quantitative:
mod2 <- glmer(Count ~ trt*Time + (Time | ID), offset=log(Weeks),
                family=poisson, data=epi_long)

summary(mod2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Count ~ trt * Time + (Time | ID)
##    Data: epi_long
##  Offset: log(Weeks)
## 
##      AIC      BIC   logLik deviance df.resid 
##   1924.2   1950.0   -955.1   1910.2      288 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3786 -0.7228 -0.1173  0.5846  6.6309 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  ID     (Intercept) 0.526863 0.72585      
##         Time        0.005029 0.07091  0.22
## Number of obs: 295, groups:  ID, 59
## 
## Fixed effects:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)        1.10395    0.14260   7.741 9.83e-15
## trtProgabide       0.01750    0.19632   0.089   0.9290
## Time              -0.01133    0.01681  -0.674   0.5004
## trtProgabide:Time -0.04669    0.02335  -2.000   0.0456
## 
## Correlation of Fixed Effects:
##             (Intr) trtPrg Time  
## trtProgabid -0.724              
## Time         0.065 -0.053       
## trtPrgbd:Tm -0.054  0.074 -0.694
plot(allEffects(mod2, residuals = T), type = "link")

# How does the interpretation of coefficients change?
# Note larger AIC


##### Fitting Marginal Models #####
?gee
mod.gee <- gee(Count ~ trt*PostBase + offset(log(Weeks)),
               id = ID, family = poisson(link = "log"), 
               corstr = "exchangeable", data = epi_long)
##            (Intercept)           trtProgabide              PostBase1 
##             1.34760922             0.02753449             0.11183602 
## trtProgabide:PostBase1 
##            -0.10472579
# corstr="exchangeable" --> compound symmetry covariance structure.
# family=poisson --> Poisson variance function (not distribution)
summary(mod.gee)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = Count ~ trt * PostBase + offset(log(Weeks)), id = ID, 
##     data = epi_long, family = poisson(link = "log"), corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
##  -4.303571  -1.303571   2.016129  10.370392 147.044355 
## 
## 
## Coefficients:
##                           Estimate Naive S.E.    Naive z Robust S.E.   Robust z
## (Intercept)             1.34760922  0.1510969  8.9188397   0.1573571  8.5640166
## trtProgabide            0.02753449  0.2071018  0.1329515   0.2217878  0.1241479
## PostBase1               0.11183602  0.1545145  0.7237900   0.1159304  0.9646821
## trtProgabide:PostBase1 -0.10472579  0.2197052 -0.4766650   0.2134448 -0.4906459
## 
## Estimated Scale Parameter:  19.6797
## Number of Iterations:  1
## 
## Working Correlation
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 1.0000000 0.7713861 0.7713861 0.7713861 0.7713861
## [2,] 0.7713861 1.0000000 0.7713861 0.7713861 0.7713861
## [3,] 0.7713861 0.7713861 1.0000000 0.7713861 0.7713861
## [4,] 0.7713861 0.7713861 0.7713861 1.0000000 0.7713861
## [5,] 0.7713861 0.7713861 0.7713861 0.7713861 1.0000000
mod.gee2 <- gee(Count ~ trt*PostBase + offset(log(Weeks)),
                id = ID, family = poisson(link = "log"), 
                corstr = "AR-M", data = epi_long)
##            (Intercept)           trtProgabide              PostBase1 
##             1.34760922             0.02753449             0.11183602 
## trtProgabide:PostBase1 
##            -0.10472579
summary(mod.gee2)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     AR-M , M = 1 
## 
## Call:
## gee(formula = Count ~ trt * PostBase + offset(log(Weeks)), id = ID, 
##     data = epi_long, family = poisson(link = "log"), corstr = "AR-M")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
##  -4.327892  -1.327892   2.120474  10.440487 147.208867 
## 
## 
## Coefficients:
##                           Estimate Naive S.E.    Naive z Robust S.E.
## (Intercept)             1.31279985  0.1427491  9.1965551   0.1617122
## trtProgabide            0.01986517  0.1960117  0.1013468   0.2117125
## PostBase1               0.15228086  0.1682744  0.9049558   0.1114624
## trtProgabide:PostBase1 -0.12923296  0.2405021 -0.5373464   0.2597892
##                           Robust z
## (Intercept)             8.11812310
## trtProgabide            0.09383086
## PostBase1               1.36620870
## trtProgabide:PostBase1 -0.49745325
## 
## Estimated Scale Parameter:  20.12528
## Number of Iterations:  3
## 
## Working Correlation
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 1.0000000 0.8102249 0.6564644 0.5318838 0.4309455
## [2,] 0.8102249 1.0000000 0.8102249 0.6564644 0.5318838
## [3,] 0.6564644 0.8102249 1.0000000 0.8102249 0.6564644
## [4,] 0.5318838 0.6564644 0.8102249 1.0000000 0.8102249
## [5,] 0.4309455 0.5318838 0.6564644 0.8102249 1.0000000
# Note similar coefficient estimates and Wald tests
# GEE std. errs robust to covariance structure assumption